options(repos = c(CRAN = "https://cran.r-project.org"))
if(!require('pacman')) install.packages('pacman')
## Loading required package: pacman
pacman::p_load(tidyverse, here, janitor, dplyr, knitr, lubridate, reactable)
rm(list = ls())
Dataset manipulation in ‘bl_schisto_long_25052024_BW’.
load("data/bl_schisto_wide_25052024_BW.Rdata")
reactable(bl_schisto_wide_25052024_BW)
library(dplyr)
library(haven)
library(labelled)
bl_schisto_wide_25052024_BW$hiv_status <- as.character(as_factor(bl_schisto_wide_25052024_BW$hiv_status))
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_BW %>%
filter(hiv_status == "HBV")
rm(list = "bl_schisto_wide_25052024_BW")
reactable(bl_schisto_wide_25052024_noCoInf)
# what values do we find in the 'hiv_status' column ?
unique_values <- unique(bl_schisto_wide_25052024_noCoInf$hiv_status)
print(unique_values)
## [1] "HBV"
# patient count
bl_schisto_wide_25052024_noCoInf %>%
arrange(pid) %>%
distinct(pid) %>%
nrow()
## [1] 799
CCA_Fb <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
filter(!is.na(cca_res))
reactable(CCA_Fb)
# checking for duplicates
CCA_Fb %>%
get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res
## # A tibble: 0 × 8
## # ℹ 8 variables: pid <chr>, cca_res <fct>, cca_bl_date <date>,
## # fibs_med_res <dbl>, fibs_iqr_res <dbl>, cap_med_res <dbl>,
## # cap_iqr_res <dbl>, dupe_count <int>
# load dataset before exclusion of HIV/HBV patients
load("data/bl_schisto_wide_25052024_BW.Rdata")
SENB_noCCA <- bl_schisto_wide_25052024_BW %>%
select(pid, enrol_d, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
rename(fibs_med = fibs_med_res) %>%
rename(fibs_iqr = fibs_iqr_res) %>%
rename(cap_med = cap_med_res) %>%
rename(cap_iqr = cap_iqr_res) %>%
filter(is.na(cca_res))
rm(list = "bl_schisto_wide_25052024_BW")
reactable(SENB_noCCA)
# checking for duplicates
SENB_noCCA %>%
get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, enrol_d, cca_res, cca_bl_date, fibs_med, fibs_iqr, cap_med, cap_iqr
## # A tibble: 0 × 9
## # ℹ 9 variables: pid <chr>, enrol_d <date>, cca_res <fct>, cca_bl_date <date>,
## # fibs_med <dbl>, fibs_iqr <dbl>, cap_med <dbl>, cap_iqr <dbl>,
## # dupe_count <int>
patient_list <- as.list(SENB_noCCA$pid)
cat(paste(patient_list, collapse = ", "))
## SENB1082, SENB1205, SENB1212, SENB1225, SENB1227, SENB1241, SENB1275, SENB1280, SENB1281, SENB1492, SENB1496, SENB1512, SENB1536, SENB1541, SENB1555, SENB1608, SENB1632, SENB1705, SENB2001, SENB2003, SENB2006, SENB2008, SENB2009, SENB2010, SENB2011, SENB2014, SENB2015, SENB2016, SENB2017, SENB2018, SENB2019, SENB2021, SENB2022, SENB2024, SENB2025, SENB2026, SENB2028, SENB2029, SENB2030, SENB2031, SENB2032, SENB2033, SENB2034, SENB2035, SENB2038, SENB2040, SENB2041, SENB2042, SENB2043, SENB2044, SENB2045, SENB2049, SENB2050, SENB2051, SENB2053, SENB2054, SENB2056, SENB2092, SENB2106, SENB2107, SENB2108, SENB2109, SENB2111, SENB2112, SENB2116, SENB2121, SENB2124, SENB2146, SENB2148, SENB2149, SENB2150, SENB2151, SENB2152, SENB2153, SENB2154, SENB2155, SENB2156, SENB2157, SENB2158, SENB2159, SENB2160, SENB2161, SENB2162, SENB2163, SENB2164, SENB2165, SENB2166, SENB2167, SENB2168, SENB2174, SENB2175, SENB2176, SENB2177, SENB2178, SENB2179, SENB2180, SENB2181, SENB2182, SENB2183, SENB2184, SENB2185, SENB2186, SENB2187, SENB2188, SENB2189, SENB2190, SENB2191, SENB2192, SENB2193, SENB2194, SENB2195, SENB2196, SENB2197, SENB2198, SENB2199, SENB2200, SENB2201, SENB2202, SENB2203, SENB2204, SENB2205, SENB2206, SENB2207, SENB2208, SENB2209, SENB2217, SENB2218
# import long table
load("data/bl_schisto_long_25052024_BW.Rdata")
# create 'CCA_Fb_long' dataset with variables of interest only
CCA_Fb_long <- bl_schisto_long_25052024_BW %>%
select(pid, enrol_d, test_dt, cca_res, fibs_med, fibs_iqr, cap_med, cap_iqr, hiv_status)
# delete unnecessary datasets
rm(list = "bl_schisto_long_25052024_BW")
# unlabelling 'cca_res'
CCA_Fb_long$cca_res <- as.character(as_factor(CCA_Fb_long$cca_res))
# unlabelling 'hiv_status'
CCA_Fb_long$hiv_status <- as.character(as_factor(CCA_Fb_long$hiv_status))
# create 'df2' dataset with fibs_date
df2 <- CCA_Fb_long %>%
group_by(pid) %>%
mutate(fibs_date = case_when(!is.na(fibs_med) & !is.na(cap_med) ~ test_dt)) %>% # create variable 'fibs_date'
fill(fibs_date, .direction = "downup") %>%
ungroup() %>%
filter(!is.na(fibs_date)) %>%
filter(is.na(cca_res)) %>%
select(-test_dt)
# join both datasets
patient_table <- inner_join(df2, SENB_noCCA) %>%
select(-cca_bl_date) %>%
select(pid, enrol_d, cca_res, hiv_status)
## Joining with `by = join_by(pid, enrol_d, cca_res, fibs_med, fibs_iqr, cap_med,
## cap_iqr)`
reactable(patient_table)
write.csv(patient_table, file = "SENB_noCCA_n127.csv")
# delete unnecessary datasets
rm(list = "df2", "CCA_Fb_long")
SENBmono_noCCA <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, cca_res, cca_bl_date, fibs_med_res, fibs_iqr_res, cap_med_res, cap_iqr_res) %>%
rename(fibs_med = fibs_med_res) %>%
rename(fibs_iqr = fibs_iqr_res) %>%
rename(cap_med = cap_med_res) %>%
rename(cap_iqr = cap_iqr_res) %>%
filter(is.na(cca_res))
reactable(SENBmono_noCCA)
# checking for duplicates
SENBmono_noCCA %>%
get_dupes
## No variable names specified - using all columns.
## No duplicate combinations found of: pid, cca_res, cca_bl_date, fibs_med, fibs_iqr, cap_med, cap_iqr
## # A tibble: 0 × 8
## # ℹ 8 variables: pid <chr>, cca_res <fct>, cca_bl_date <date>, fibs_med <dbl>,
## # fibs_iqr <dbl>, cap_med <dbl>, cap_iqr <dbl>, dupe_count <int>
sociodemo1 <- bl_schisto_wide_25052024_noCoInf %>%
select(1:23) %>%
mutate(country_orig = case_when(is.na(country_orig) ~ "Senegal", # rename all 'NA' values in `country_orig` to 'Senegal'
!is.na(country_orig) ~ country_orig)) %>%
mutate(region_orig = case_when(!is.na(region_orig) ~ region_orig, # rename all 'NA' values in `region_orig` to their country of origin
is.na(region_orig) ~ country_orig)) %>%
mutate(dpt_orig = case_when(!is.na(dpt_orig) ~ dpt_orig, # rename all 'NA' values in `dpt_orig` to their country of origin
is.na(dpt_orig) ~ country_orig)) %>%
select(-sen_yn)
reactable(sociodemo1)
sociodemo2 <- sociodemo1 %>%
select(pid, age_en, sex, country_orig, region_orig, reg_act, employ, other_employ) %>%
mutate(employ = case_when(employ != "other" ~ employ,
employ == "other" ~ str_to_lower(other_employ))) %>%
select(-other_employ)
reactable(sociodemo2)
rm(list = "sociodemo1")
# age
## define median
median_age <- median(sociodemo2$age_en, na.rm = TRUE)
## define categories
age_cat <- sociodemo2 %>%
mutate(age_en = case_when(age_en < 31 ~ "18-31",
age_en >= 31 ~ ">31"))
age_cat %>%
tabyl(age_en) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## age_en n percent
## 18-31 384 48.06%
## >31 415 51.94%
## Total 799 100.00%
# sex
sociodemo2 %>%
tabyl(sex) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## sex n percent
## Male 427 53.44%
## Female 372 46.56%
## Total 799 100.00%
# origin
## define categories
orig_cat <- sociodemo2 %>%
mutate(orig_endemicity = case_when(
region_orig %in% c("Cameroun", "Benin", "Guinea", "Guinea_Bissau", "Ivory_Coast", "Mali", "Mauritania") ~ "Other Country",
region_orig %in% c("Kédougou", "Tambacounda", "Matam", "Saint_Louis", "Louga", "Sédhiou", "Kolda") ~ "≥50% prevalence (high)",
region_orig %in% c("Ziguinchor", "Kaffrine", "Kaolack", "Fatick", "Diurbel", "Thiès") ~ "1-49% prevalence (moderate)",
region_orig == "Dakar" ~ "<1% prevalence (non-endemic)"
))
orig_cat %>%
tabyl(orig_endemicity) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## orig_endemicity n percent
## 1-49% prevalence (moderate) 419 52.44%
## <1% prevalence (non-endemic) 271 33.92%
## Other Country 17 2.13%
## ≥50% prevalence (high) 92 11.51%
## Total 799 100.00%
# reg_act
reg_act_cat <- sociodemo2 %>%
mutate(reg_act = case_when(reg_act == "Dakar" ~ "Dakar",
reg_act != "Dakar" ~ "Outside"))
reg_act_cat %>%
tabyl(reg_act) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## reg_act n percent
## Dakar 642 80.35%
## Outside 157 19.65%
## Total 799 100.00%
# profession ?
rm(list = "age_cat", "orig_cat", "reg_act_cat")
sociodemo_final <- sociodemo2 %>%
mutate(age_en = case_when(
age_en < 31 ~ "18-31",
age_en >= 31 ~ ">31"
)) %>%
mutate(orig_endemicity = case_when(
region_orig %in% c("Cameroun", "Benin", "Guinea", "Guinea_Bissau", "Ivory_Coast", "Mali", "Mauritania") ~ "Other Country",
region_orig %in% c("Kédougou", "Tambacounda", "Matam", "Saint_Louis", "Louga", "Sédhiou", "Kolda") ~ "≥50% prevalence (high)",
region_orig %in% c("Ziguinchor", "Kaffrine", "Kaolack", "Fatick", "Diurbel", "Thiès") ~ "1-49% prevalence (moderate)",
region_orig == "Dakar" ~ "<1% prevalence (non-endemic)"
)) %>%
mutate(reg_act = case_when(reg_act == "Dakar" ~ "Dakar",
reg_act != "Dakar" ~ "Outside")) %>%
rename(age_cat = age_en) %>%
select(pid, age_cat, sex, country_orig, region_orig, orig_endemicity, reg_act, employ)
reactable(sociodemo_final)
rm(list = "sociodemo2")
# recreate variable 'HTA' following definition : Confirmed arterial systolic pressure ≥140 mmHg and/or arterial diastolic pressure ≥90 mmHg, or any anti-hypertensive treatment before inclusion in SEN-B (source : WHO, https://www.who.int/news-room/fact-sheets/detail/hypertension, updated 16 March 2023)
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_noCoInf %>%
mutate(HTA_diff = aHTA_start - enrol_d) %>%
mutate(HTA = case_when(!is.na(HTA_diff) ~ "yes",
tas >= 140 ~ "yes",
tad >= 90 ~ "yes",
TRUE ~ "no")) %>%
select(-HTA_diff)
# recreate variable 'diabete' ?
bl_schisto_wide_25052024_noCoInf %>%
select(diabete, glu_res) %>%
filter(diabete == "yes")
## # A tibble: 22 × 2
## diabete glu_res
## <fct> <dbl>
## 1 yes 1.4
## 2 yes 1.96
## 3 yes 1.69
## 4 yes 0.7
## 5 yes 0.75
## 6 yes 2.2
## 7 yes 0.74
## 8 yes 2.43
## 9 yes 1.63
## 10 yes 0.86
## # ℹ 12 more rows
# create variable 'dyslipidemia' : Total cholesterol >2g/l, HDL-c <0.4 g/l in men and <0.5 g/l in women, triglycerides >1.5 g/l, or LDL-cholesterol >1 g/l (source : https://www.ncbi.nlm.nih.gov/books/NBK560891/, updated 4 March 2024)
bl_schisto_wide_25052024_noCoInf <- bl_schisto_wide_25052024_noCoInf %>%
mutate(dyslipidemia = case_when(colt_res > 2 ~ "yes",
trg_res > 1.5 ~ "yes",
ldl_res > 1 ~ "yes",
sex == "Female" & hdl_res < 0.4 ~ "yes",
sex == "Male" & hdl_res < 0.5 ~ "yes",
TRUE ~ "no"))
# round 'BMI' to only 1 digit
bl_schisto_wide_25052024_noCoInf$BMI <- round(bl_schisto_wide_25052024_noCoInf$BMI, 1)
como1 <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, sex, alcohol_cons, audit_tot, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, COL, AVC, renal, cancer, liver_dis_FDR, hcc_FDR)
reactable(como1)
# alcohol consumption
como1 %>%
tabyl(alcohol_cons) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## alcohol_cons n percent
## no 744 93.12%
## yes_now 34 4.26%
## yes_past 21 2.63%
## Total 799 100.00%
# audit-c (https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10163557/)
audit_cat <- como1 %>%
mutate(audit_cat = case_when(sex == "Female" & audit_tot >= 3 ~ "Misuse",
sex == "Male" & audit_tot >= 4 ~ "Misuse",
audit_tot >= 10 ~ "Addiction",
TRUE ~ "Negative"))
audit_cat %>%
tabyl(audit_cat) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## audit_cat n percent
## Misuse 15 1.88%
## Negative 784 98.12%
## Total 799 100.00%
# drugs
como1 %>%
tabyl(drugs) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## drugs n percent
## no 786 98.37%
## yes_now 0 0.00%
## yes_past 13 1.63%
## Total 799 100.00%
# smoking
como1 %>%
tabyl(smoke_yn) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## smoke_yn n percent
## no 710 88.86%
## yes_now 18 2.25%
## yes_past 71 8.89%
## Total 799 100.00%
# HTA
como1 %>%
tabyl(HTA) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## HTA n percent
## no 607 75.97%
## yes 192 24.03%
## Total 799 100.00%
# diabete
como1 %>%
tabyl(diabete) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## diabete n percent
## no 777 97.25%
## yes 22 2.75%
## Total 799 100.00%
# BMI (https://www.ncbi.nlm.nih.gov/books/NBK541070/)
BMI_cat <- como1 %>%
mutate(BMI_cat = case_when(BMI < 18.5 ~ "underweight",
BMI >= 18.5 & BMI < 25 ~ "normal weight",
BMI >= 25 & BMI < 30 ~ "overweight",
BMI >= 30 ~ "obesity"))
BMI_cat %>%
tabyl(BMI_cat) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## BMI_cat n percent valid_percent
## normal weight 459 57.45% 58.03%
## obesity 59 7.38% 7.46%
## overweight 156 19.52% 19.72%
## underweight 117 14.64% 14.79%
## <NA> 8 1.00% -
## Total 799 100.00% 100.00%
# dyslipidemia
como1 %>%
tabyl(dyslipidemia) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## dyslipidemia n percent
## no 482 60.33%
## yes 317 39.67%
## Total 799 100.00%
# variable 'COL' (?)
como1 %>%
tabyl(COL) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## COL n percent
## no 796 99.62%
## yes 3 0.38%
## Total 799 100.00%
# AVC
como1 %>%
tabyl(AVC) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## AVC n percent
## no 799 100.00%
## yes 0 0.00%
## Total 799 100.00%
# variable 'renal' (?)
como1 %>%
tabyl(renal) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## renal n percent
## no 799 100.00%
## yes 0 0.00%
## Total 799 100.00%
# Cancer
como1 %>%
tabyl(cancer) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## cancer n percent
## no 798 99.87%
## yes 1 0.13%
## Total 799 100.00%
# Liver Disease
como1 %>%
tabyl(liver_dis_FDR) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## liver_dis_FDR n percent
## no 784 98.12%
## yes 15 1.88%
## Total 799 100.00%
# CHC Familial
como1 %>%
tabyl(hcc_FDR) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## hcc_FDR n percent
## no 698 87.36%
## yes 101 12.64%
## Total 799 100.00%
# supprimer les objets crées de l'environnement
rm(list = "audit_cat", "BMI_cat")
# supprimer les variables peu intéressantes après l'anaylse descriptive
como2 <- como1 %>%
select(pid, sex, alcohol_cons, audit_tot, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR) %>%
mutate(audit = case_when(sex == "Female" & audit_tot >= 3 ~ "Misuse",
sex == "Male" & audit_tot >= 4 ~ "Misuse",
audit_tot >= 10 ~ "Addiction",
TRUE ~ "Negative")) %>%
mutate(BMI = case_when(BMI < 18.5 ~ "underweight",
BMI >= 18.5 & BMI < 25 ~ "normal weight",
BMI >= 25 & BMI < 30 ~ "overweight",
BMI >= 30 ~ "obesity")) %>%
select(pid, sex, alcohol_cons, audit, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR)
reactable(como2)
rm(list = "como1")
sociodemo_como <- left_join(sociodemo_final, como2, by = "pid") %>%
rename(sex = sex.x) %>%
select(-sex.y)
# delete unnecessary tables
rm(list = "sociodemo_final", "como2")
I forgot to add the “dipstick EtG” variable…
bl_schisto_wide_25052024_noCoInf
## # A tibble: 799 × 224
## pid hiv_status enrol_d center med sen_yn country_orig region_orig
## <chr> <chr> <date> <fct> <chr> <fct> <fct> <fct>
## 1 SENB1002 HBV 2019-09-30 SMIT 1 yes <NA> Fatick
## 2 SENB1003 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 3 SENB1004 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 4 SENB1005 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 5 SENB1006 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 6 SENB1007 HBV 2019-10-02 SMIT 1 yes <NA> Ziguinchor
## 7 SENB1008 HBV 2019-10-02 SMIT 1 yes <NA> Kaolack
## 8 SENB1009 HBV 2019-10-02 SMIT 1 yes <NA> Louga
## 9 SENB1010 HBV 2019-10-02 SMIT 1 yes <NA> Diurbel
## 10 SENB1011 HBV 2019-10-02 SMIT 1 no Mali <NA>
## # ℹ 789 more rows
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## # dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## # marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## # other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## # hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## # hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>, …
# create table with 'pid' and 'etg_res'
dipstick_etg <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, etg_res)
# join 'dipstick_etg' with sociodemo_como
sociodemo_como <- left_join(sociodemo_como, dipstick_etg, by = "pid") %>%
select(pid, age_cat, sex, country_orig, region_orig, orig_endemicity, reg_act, employ, alcohol_cons, audit, etg_res,
drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia, liver_dis_FDR, hcc_FDR)
# delete 'dipstick_etg' table
rm(list = "dipstick_etg")
reactable(sociodemo_como)
hbv_markers <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, enrol_d, cvb_res, AgHBs_res, qAgHBs_res, AgHBe_res, AcHBe_res)
# should I add AcHBs_res too ? lots of missing data (n=132)
bl_schisto_wide_25052024_noCoInf %>%
tabyl(AcHBs_res)
## AcHBs_res n percent valid_percent
## Positive 8 0.01001252 0.011994
## Negative 659 0.82478098 0.988006
## <NA> 132 0.16520651 NA
# create variable "screen_d" with just the year
hbv_markers <- hbv_markers %>%
mutate(screen_d = year(enrol_d)) %>%
select(-enrol_d) %>%
select(pid, screen_d, cvb_res, AgHBs_res, qAgHBs_res, AgHBe_res, AcHBe_res)
# categorize variable 'cvb_res' : <20, 20-2000, >2000 IU/mL (source : Hofmann et al., Hepatitis B Virus (HBV) Replication During Tenofovir Therapy Is Frequent in Human Immunodeficiency Virus/HBV Coinfection, Clinical Infectious Diseases, Volume 76, Issue 4, 15 February 2023, Pages 730–733, https://doi.org/10.1093/cid/ciac823)
hbv_markers <- hbv_markers %>%
mutate(cvb_detection = case_when(cvb_res < 20 ~ "HBV Suppression",
cvb_res >2000 ~ "High-level Viremia",
TRUE ~ "Low-level Viremia")) %>%
select(pid, screen_d, cvb_res, cvb_detection, AgHBs_res, qAgHBs_res, AgHBe_res, AcHBe_res)
# categorize variable 'qAgHBs_res' : <100, 100-1000, >1000 (source : Ouzan, 2014)
hbv_markers <- hbv_markers %>%
mutate(qAgHBs_cat = case_when(qAgHBs_res < 100 ~ "<100 IU/mL",
qAgHBs_res > 1000 ~ ">1000 IU/mL",
TRUE ~ "100-1000 IU/mL")) %>%
select(pid, screen_d, cvb_res, cvb_detection, AgHBs_res, qAgHBs_res, qAgHBs_cat, AgHBe_res, AcHBe_res)
reactable(hbv_markers)
# Screening date
hbv_markers %>%
tabyl(screen_d) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## screen_d n percent
## 2019 82 10.26%
## 2020 59 7.38%
## 2021 409 51.19%
## 2022 243 30.41%
## 2023 6 0.75%
## Total 799 100.00%
# HBV DNA viral load
hbv_markers %>%
tabyl(cvb_detection) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## cvb_detection n percent
## High-level Viremia 214 26.78%
## Low-level Viremia 585 73.22%
## Total 799 100.00%
# AgHBs
hbv_markers %>%
tabyl(AgHBs_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## AgHBs_res n percent
## Positive 781 97.75%
## Negative 18 2.25%
## Total 799 100.00%
# qAgHBs
hbv_markers %>%
tabyl(qAgHBs_cat) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## qAgHBs_cat n percent
## 100-1000 IU/mL 101 12.64%
## <100 IU/mL 71 8.89%
## >1000 IU/mL 627 78.47%
## Total 799 100.00%
# AgHbe
hbv_markers %>%
tabyl(AgHBe_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## AgHBe_res n percent
## Positive 19 2.38%
## Negative 780 97.62%
## Total 799 100.00%
# AcHBe
hbv_markers %>%
tabyl(AcHBe_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## AcHBe_res n percent
## Positive 766 95.87%
## Negative 33 4.13%
## Total 799 100.00%
SchistoHBV <- left_join(sociodemo_como, hbv_markers, by = "pid") %>%
select(pid, screen_d, age_cat, sex, country_orig, region_orig,
orig_endemicity, reg_act, employ, alcohol_cons, audit,
etg_res, drugs, smoke_yn, HTA, diabete, BMI, dyslipidemia,
liver_dis_FDR, hcc_FDR, cvb_detection, AgHBs_res,
qAgHBs_cat, AgHBe_res, AcHBe_res)
rm(list = "sociodemo_como", "hbv_markers")
reactable(SchistoHBV)
hcv_hdv_markers <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, AcVHC_res, AcVHD_res)
reactable(hcv_hdv_markers)
# HCV
hcv_hdv_markers %>%
tabyl(AcVHC_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## AcVHC_res n percent
## Positive 2 0.25%
## Negative 797 99.75%
## Total 799 100.00%
# HDV
hcv_hdv_markers %>%
tabyl(AcVHD_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## AcVHD_res n percent valid_percent
## Positive 10 1.25% 1.25%
## Negative 787 98.50% 98.75%
## <NA> 2 0.25% -
## Total 799 100.00% 100.00%
TDF_ttt <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, enrol_d, on_TDF_bl, TDF_start) %>%
mutate(on_TDF_bl = case_when(is.na(on_TDF_bl) ~ "No",
on_TDF_bl == "FALSE" ~"No",
on_TDF_bl == "TRUE" ~ "Yes")) %>%
mutate(TDF_duration = case_when(on_TDF_bl == "No" ~ 0,
on_TDF_bl == "Yes" ~ as.numeric(TDF_start - enrol_d))) %>%
select(-TDF_start) %>%
select(-enrol_d)
reactable(TDF_ttt)
TDF_ttt %>%
tabyl(on_TDF_bl) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## on_TDF_bl n percent
## No 749 93.74%
## Yes 50 6.26%
## Total 799 100.00%
# table of patients treated before baseline
patients_TDF_bl <- TDF_ttt %>%
filter(on_TDF_bl == "Yes")
# mean TDF duration at baseline
summary_TDF_duration <- patients_TDF_bl %>%
summarise(
mean_TDF_duration = mean(abs(as.numeric(TDF_duration)), na.rm = TRUE),
CI95 = t.test(abs(as.numeric(TDF_duration)), conf.level = 0.95, na.rm = TRUE)$conf.int
)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
mean_TDF_duration <- summary_TDF_duration$mean_TDF_duration
CI95_lower <- summary_TDF_duration$CI95[1]
CI95_upper <- summary_TDF_duration$CI95[2]
mean_TDF_duration
## [1] 636.94 636.94
CI95_lower
## [1] 339.4184
CI95_upper
## [1] 934.4616
rm(list = "summary_TDF_duration", "patients_TDF_bl")
df <- left_join(hcv_hdv_markers, TDF_ttt, by = "pid")
SchistoHBV <- left_join(SchistoHBV, df, by = "pid") %>%
mutate(employ = str_to_title(employ),
alcohol_cons = str_to_title(alcohol_cons),
drugs = str_to_title(drugs),
smoke_yn = str_to_title(smoke_yn),
HTA = str_to_title(HTA),
diabete = str_to_title(diabete),
BMI = str_to_title(BMI),
dyslipidemia = str_to_title(dyslipidemia),
liver_dis_FDR = str_to_title(liver_dis_FDR),
hcc_FDR = str_to_title(hcc_FDR))
reactable(SchistoHBV)
rm(list = "df", "TDF_ttt", "hcv_hdv_markers")
liver_markers <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, sex, alt_res, ast_res, pal_res, ggt_res, plt_res, blb_res, tp_res, alb_res, fibs_med_res, cap_med_res) %>%
mutate(apri_score = ((ast_res/40)*100)/plt_res*1000) %>%
mutate(apri_score = round(apri_score, 2)) %>%
# recategorize variables
mutate(alt_res = case_when(sex == "Male" & alt_res > 50 ~ "High",
sex == "Female" & alt_res > 40 ~ "High",
TRUE ~ "Normal"),
apri_diag = case_when(apri_score > 1.5 ~ "Severe Fibrosis",
apri_score < 0.5 ~ "No Fibrosis",
is.na(apri_score) ~ "NA",
TRUE ~ "Mild Fibrosis"),
pal_res = case_when(pal_res > 300 ~ ">300 IU/L", # liver disease
is.na(pal_res) ~ "NA",
TRUE ~ "Normal"),
ggt_res = case_when(ggt_res > 50 ~ ">50 IU/L",
is.na(ggt_res) ~ "NA",
TRUE ~ "Normal"),
blb_res = case_when(blb_res > 1.1 ~ ">1.1 mg/L",
is.na(blb_res) ~ "NA",
TRUE ~ "Normal"),
tp_res = case_when(tp_res < 60 ~ "<60 g/L", # liver or kidney disease
is.na(tp_res) ~ "NA",
TRUE ~ "Normal"),
alb_res = case_when(alb_res < 35 ~ "<35 g/L", # liver disease, Crohn disease
is.na(alb_res) ~ "NA",
TRUE ~ "Normal"),
fibs_med_diag = case_when(fibs_med_res >= 7.9 ~ "F2-4 (fibrosis or cirrhosis)",
is.na(fibs_med_res) ~ "NA",
TRUE ~ "F0-1 (no or mild fibrosis)"),
cap_med_diag = case_when(cap_med_res >= 248 ~ "SLD",
is.na(cap_med_res) ~ "NA",
TRUE ~ "Normal")
) %>%
select(pid, alt_res, pal_res, ggt_res, blb_res, tp_res, alb_res, apri_score, fibs_med_res, apri_diag, fibs_med_diag, cap_med_res, cap_med_diag)
# SOURCES : Lala V, Zubair M, Minter DA. Liver Function Tests. [Updated 2023 Jul 30]. In: StatPearls [Internet]. Treasure Island (FL): StatPearls Publishing; 2024 Jan-. Available from: https://www.ncbi.nlm.nih.gov/books/NBK482489
## APRI & FIB-4 : Lee J, Vali Y, Boursier J, Spijker R, Anstee QM, Bossuyt PM, Zafarmand MH. Prognostic accuracy of FIB-4, NAFLD fibrosis score and APRI for NAFLD-related events: A systematic review. Liver Int. 2021 Feb;41(2):261-270. doi: 10.1111/liv.14669. PMID: 32946642; PMCID: PMC7898346.
## Fibrosis : Johannessen, A., Stockdale, A.J., Henrion, M.Y.R. et al. Systematic review and individual-patient-data meta-analysis of non-invasive fibrosis markers for chronic hepatitis B in Africa. Nat Commun 14, 45 (2023). https://doi.org/10.1038/s41467-022-35729-w
## Steatosis : Karlas, T., et al., Individual patient data meta-analysis of controlled attenuation parameter (CAP) technology for assessing steatosis. J Hepatol, 2017. 66(5): p. 1022-1030.
reactable(liver_markers)
# ALAT
liver_markers %>%
tabyl(alt_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## alt_res n percent
## High 37 4.63%
## Normal 762 95.37%
## Total 799 100.00%
# PAL
liver_markers %>%
tabyl(pal_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## pal_res n percent
## >300 IU/L 17 2.13%
## NA 4 0.50%
## Normal 778 97.37%
## Total 799 100.00%
# GGT
liver_markers %>%
tabyl(ggt_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## ggt_res n percent
## >50 IU/L 22 2.75%
## NA 4 0.50%
## Normal 773 96.75%
## Total 799 100.00%
# Total Bilirubin
liver_markers %>%
tabyl(blb_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## blb_res n percent
## >1.1 mg/L 68 8.51%
## NA 90 11.26%
## Normal 641 80.23%
## Total 799 100.00%
# Total Protein
liver_markers %>%
tabyl(tp_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## tp_res n percent
## <60 g/L 1 0.13%
## NA 713 89.24%
## Normal 85 10.64%
## Total 799 100.00%
# Albumin
liver_markers %>%
tabyl(alb_res) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## alb_res n percent
## <35 g/L 69 8.64%
## NA 93 11.64%
## Normal 637 79.72%
## Total 799 100.00%
# APRI
liver_markers %>%
tabyl(apri_diag) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## apri_diag n percent
## Mild Fibrosis 30 3.75%
## No Fibrosis 761 95.24%
## Severe Fibrosis 8 1.00%
## Total 799 100.00%
# FibroScan LSM
liver_markers %>%
tabyl(fibs_med_diag) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## fibs_med_diag n percent
## F0-1 (no or mild fibrosis) 731 91.49%
## F2-4 (fibrosis or cirrhosis) 68 8.51%
## Total 799 100.00%
# FibroScan CAP
liver_markers %>%
tabyl(cap_med_diag) %>%
adorn_totals("row") %>%
adorn_pct_formatting(digits = 2)
## cap_med_diag n percent
## Normal 731 91.49%
## SLD 68 8.51%
## Total 799 100.00%
# Remove 'Bilirubin' (n=90), 'Total Protein' (n=713) and 'Albumin' (n=93) because of too many NAs
liver_markers <- liver_markers %>%
select(-blb_res, -tp_res, -alb_res)
discordant_patients <- liver_markers %>%
filter((apri_diag == "No or Mild Fibrosis" & fibs_med_diag == "F2-4 (fibrosis or cirrhosis)") |
(apri_diag == "Severe Fibrosis" & fibs_med_diag == "F0-1 (no or mild fibrosis)") |
(apri_diag != "Mild Fibrosis" & apri_diag != "Severe Fibrosis" & apri_diag != "No Fibrosis") |
(fibs_med_diag != "F0-1 (no or mild fibrosis)" & fibs_med_diag != "F2-4 (fibrosis or cirrhosis)"))
# Print discordant patients (n=2)
reactable(discordant_patients)
bl_schisto_wide_25052024_noCoInf %>%
filter(pid %in% c("SENB1342", "SENB1628")) #...
## # A tibble: 2 × 224
## pid hiv_status enrol_d center med sen_yn country_orig region_orig
## <chr> <chr> <date> <fct> <chr> <fct> <fct> <fct>
## 1 SENB1342 HBV 2021-09-01 SMIT 6 yes <NA> Thiès
## 2 SENB1628 HBV 2022-05-09 SMIT 6 yes <NA> Dakar
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## # dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## # marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## # other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## # hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## # hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>,
## # hiv_arn_last <dbl>, hiv_arn_less <chr>, hiv_arn_date <date>, who <dbl>, …
SchistoHBV <- left_join(SchistoHBV, liver_markers, by = "pid")
reactable(SchistoHBV)
urine_markers <- bl_schisto_wide_25052024_noCoInf %>%
select(pid, protu_res, creu_res, albu_res, leubu_res, hembu_res, protbu_res, glubu_res)
# create 'protein/creatinine ratio' variable (https://doi.org/10.1080/10408363.2020.1723487)
urine_markers <- urine_markers %>%
mutate(upcr_ratio = protu_res / creu_res) %>%
mutate(upcr_ratio = round(upcr_ratio, 2)) %>%
mutate(upcr_diag = case_when(upcr_ratio > 0.2 ~ "Proteinuria (>0.2 mg/mg)",
TRUE ~ "Normal (<0.2 mg/mg)")) %>%
select(pid, protu_res, creu_res, upcr_ratio, upcr_diag, albu_res, leubu_res, hembu_res, protbu_res, glubu_res)
# create 'albumin/creatinine ratio' variable (https://doi.org/10.1097/JU.0000000000001005)
urine_markers <- urine_markers %>%
mutate(uacr_ratio = albu_res / creu_res) %>%
mutate(uacr_ratio = round(uacr_ratio, 2)) %>%
mutate(uacr_diag = case_when(uacr_ratio > 0.3 ~ "Severe Albuminuria (>0.3 mg/mg)",
uacr_ratio <= 0.03 ~ "Normal (<0.03 mg/mg)",
TRUE ~ "Moderate Albuminuria (0.03-0.3 mg/mg)")) %>%
select(pid, protu_res, creu_res, upcr_ratio, upcr_diag, albu_res, uacr_ratio, uacr_diag,
leubu_res, hembu_res, protbu_res, glubu_res)
reactable(urine_markers)
bl_schisto_wide_25052024_noCoInf
## # A tibble: 799 × 224
## pid hiv_status enrol_d center med sen_yn country_orig region_orig
## <chr> <chr> <date> <fct> <chr> <fct> <fct> <fct>
## 1 SENB1002 HBV 2019-09-30 SMIT 1 yes <NA> Fatick
## 2 SENB1003 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 3 SENB1004 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 4 SENB1005 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 5 SENB1006 HBV 2019-10-01 SMIT 1 yes <NA> Dakar
## 6 SENB1007 HBV 2019-10-02 SMIT 1 yes <NA> Ziguinchor
## 7 SENB1008 HBV 2019-10-02 SMIT 1 yes <NA> Kaolack
## 8 SENB1009 HBV 2019-10-02 SMIT 1 yes <NA> Louga
## 9 SENB1010 HBV 2019-10-02 SMIT 1 yes <NA> Diurbel
## 10 SENB1011 HBV 2019-10-02 SMIT 1 no Mali <NA>
## # ℹ 789 more rows
## # ℹ 216 more variables: dpt_orig <fct>, res_time <dbl>, reg_act <fct>,
## # dpt_act <fct>, birth_d <date>, age_en <dbl>, sex <fct>, rank <dbl>,
## # marital <fct>, union <fct>, child <dbl>, education <fct>, employ <fct>,
## # other_employ <chr>, health_ins <fct>, hiv_yn <fct>, type_HIV <chr>,
## # hiv_test1 <fct>, hiv_test1_date <date>, hiv_test2 <fct>,
## # hiv_test2_date <date>, CD4_last <dbl>, CD4_last_date <date>, …